***********************************************************
***		Do-File to harmonize person measures			***
***		across time and to create a single repeated		***
***		cross-sectional establishment dataset			***
***********************************************************

clear
set more off
log using $log/02_merge_persons.log , replace

*** Open Basis for year x, plug information into person data

forvalues year = 1993/2008 {

	use $orig/LIAB_QM2_9310_v1_bhp_basis_v1.dta if jahr == `year', clear
	keep betnr jahr w73_3_gen ao_bula

	recode ao_bula (0=11)(10=7), gen(bula_basis)		/* west-berlin=berlin; saarland=rheinld.pf. for a time consistent bula variable */

	recode w73_3_gen ///
		(   0/89 = 1) ///
		( 90/589 = 2) ///
		(590/619 = 3) ///
		(620/629 = 4) ///
		(630/689 = 5) ///
		(690/699 = 6) ///
		(700/869 = 7) ///
		(870/909 = 8) ///
		(910/999 = 8), gen(w73_1_basis)

	merge 1:m betnr using "$orig/LIAB_qm2_9310_v1_pers_`year'.dta"
	* _merge==2	sind keinem Betrieb zugewiesen weil aus Leistungsempfaenger-Historik */
	keep if quelle==1
	rename _merge _merge_basispers

	* keep only vars for later use
	keep persnr idnum jahr _merge_basispers quelle erwstat tage_erw tage_bet stib bild beruf gebjahr frau nation_gr tentgelt w73_3_gen w73_1_basis ao_bula bula_basis
	compress
	save "$data/pers_basis_`year'.dta", replace
}

*** stack into one repeated cs dataset
set more off
use "$data/pers_basis_1993.dta", clear

forvalues year = 1994/2008 {
	qui append using "$data/pers_basis_`year'.dta"
	erase "$data/pers_basis_`year'.dta"
	}
	erase "$data/pers_basis_1993.dta"

***	merge with betriebspanel
merge m:1 idnum jahr using "$data/bp_1993-2008.dta"
rename _merge _merge_persbp
label value w73_1_basis w73_1
erase "$data/bp_1993-2008.dta"

*** Jetzt Ausschluesse auf Betriebsebene vornehmen
drop if east == 1						/* Nur Westdeutschland */
drop if ertragslage_l0 == .a	| ertragslage_l1 == .a | ertragslage_l2 == .a		/* Drop all non-profits (i.e. when earnings situation "does not apply") but keep regular missings */
drop if w73_1 == 8 | w73_1_basis == 8	/* no public sector employers */

keep if (quelle == 1 & erwstat==101 & stib !=8 & stib !=9 & stib!=0)
		/* only workers covered by regular social insurance scheme */
		/* only full time workers */
		/* no persons in training */

drop if _merge_basispers != 3
drop if _merge_persbp != 3
drop if missing(w73_1_basis) | missing(w73_1)
misstable summarize ertragslage_l0
drop if missing(ertragslage_l0)

*** Assess matching quality
gen bp_basis_fit = .
replace bp_basis_fit =  1 if (bula == bula_basis) & (w73_1 == w73_1_basis)
replace bp_basis_fit = -1 if (bula != bula_basis) & (w73_1 == w73_1_basis)
replace bp_basis_fit = -2 if (bula == bula_basis) & (w73_1 != w73_1_basis)
replace bp_basis_fit = -3 if (bula != bula_basis) & (w73_1 != w73_1_basis)
label define bp_basis_fit 1 "fit" -1 "only bula differs" -2 "only ind differs" -3 "bula and ind differ"
label values bp_basis_fit bp_basis_fit


/*	For cases where ind differs, I have looked at the exact industries. In almost all cases the differences make sense
	substantially:
	Bauindustrie in Betriebspanel, Montage und Reparatur ges.techn.Anlagen (=Sanit�r) in Basis
	Bauindustrie in Betriebspanel, Herstellung von Bauelementen in Basis
	Energiew.u.Wasservers., Bergb. in BP, Stra�enreinigung in Basis
	Org.o.E./�ff.Verw./Soz.vers. in BP, Hochschulen in Basis
	Dienstleist., soweit n.genannt in BP, Org. der freien Wohlfahrt in Basis
	Kredit- u.Versicherungsgewerbe in BP, Bundespost in Basis
	Kredit- u.Versicherungsgewerbe in BP, Sozialversicherung in Basis
	Verkehr u.Nachrichten�bermitt. in BP, Rundfunk und Fernsehanstalten in Basis
	Handel in BP, Rep. v. Kfz und Fahrraedern in Basis
	I therefore decided to keep cases where industries differ and use classifications from Betriebspanel.
	tab w73_3_gen if bp_basis_fit==-2 & w73_1==3 */

tab bp_basis_fit
drop if bp_basis_fit == -1 | bp_basis_fit == -3
drop bp_basis_fit

* generate establishment size
bysort estid jahr: gen nrftemployees = _N

*** recode person variables across waves

* gender
rename frau female

* age
gen age = jahr - gebjahr
gen age2 = age^2
gen age3 = age^3

* education
* improve education variable following Fitzenberger etal 2005
sort persnr jahr
order persnr jahr
gen ibild = bild

local i = 0			/* extrapolate novtnoabi to subsequent spells with missing information */
while `i' <=20 {
	qui replace ibild = 1 if missing(ibild) & ibild[_n-1]==1 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate vtnoabi to subsequent spells with missing information or novtnoabi */
while `i' <=20 {
	qui replace ibild = 2 if (missing(ibild) | ibild==1) & ibild[_n-1]==2 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate novtabi to subsequent spells with missing information or novtnoabi */
while `i' <=20 {
	qui replace ibild = 3 if (missing(ibild) | ibild==1) & ibild[_n-1]==3 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* impute vtabi if novtabi is accepted and vtnoabi has been accepted before or vice versa */
while `i' <=20 {
	qui replace ibild = 4 if ibild == 3 & ibild[_n-`i']==2 & persnr == persnr[_n-`i']
	qui replace ibild = 4 if ibild == 2 & ibild[_n-`i']==3 & persnr == persnr[_n-`i']
	local i = `i' + 1
	}
local i = 0			/* extrapolate vtabi to subsequent spells with missing information or lower degree */
while `i' <=20 {
	qui replace ibild = 4 if (missing(ibild) | ibild==3 | ibild==2 | ibild==1) & ibild[_n-1]==4 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate collfh to subsequent spells with missing information or lower degree */
while `i' <=20 {
	qui replace ibild = 5 if (missing(ibild) | ibild==4 | ibild==3 | ibild==2 | ibild==1)& ibild[_n-1]==5 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* extrapolate colluni to subsequent spells with missing information or lower degree */
while `i' <=20 {
	qui replace ibild = 6 if (missing(ibild) | ibild==5 | ibild==4 | ibild==3 | ibild==2 | ibild==1) & ibild[_n-1]==6 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* backward extrapolation (with specific age limits) */
while `i' <=20 {
	qui replace ibild = 1 if missing(ibild) & ibild[_n+1]==1 & persnr == persnr[_n+1]
	qui replace ibild = 2 if missing(ibild) & ibild[_n+1]==2 & persnr == persnr[_n+1] & age >=20
	qui replace ibild = 3 if missing(ibild) & ibild[_n+1]==3 & persnr == persnr[_n+1] & age >=21
	qui replace ibild = 4 if missing(ibild) & ibild[_n+1]==4 & persnr == persnr[_n+1] & age >=23
	qui replace ibild = 5 if missing(ibild) & ibild[_n+1]==5 & persnr == persnr[_n+1] & age >=27
	qui replace ibild = 6 if missing(ibild) & ibild[_n+1]==6 & persnr == persnr[_n+1] & age >=29
	local i = `i' + 1
	}

replace ibild=2 if missing(ibild) & (stib==2 | stib==3)		/* impute vtnoabi if person is Facharbeiter or Polier or Meister */

local i = 0			/* impute vtabi if novtabi is accepted but and vtnoabi has been accepted before or vice versa */
while `i' <=20 {
	qui replace ibild = 4 if ibild == 3 & ibild[_n-`i']==2 & persnr == persnr[_n-`i']
	qui replace ibild = 4 if ibild == 2 & ibild[_n-`i']==3 & persnr == persnr[_n-`i']
	local i = `i' + 1
	}


local i = 0			/* extrapolate vtnoabi once again with this new information */
while `i' <=20 {
	qui replace ibild = 2 if (missing(ibild) | ibild==1) & ibild[_n-1]==2 & persnr == persnr[_n-1]
	local i = `i' + 1
	}
local i = 0			/* and backward */
while `i' <=20 {
	qui replace ibild = 2 if missing(ibild) & ibild[_n+1]==2 & persnr == persnr[_n+1] & age >=20
	local i = `i' + 1
}

recode ibild (1=1)(2=2)(3=1)(4=3)(5 6=4), gen(bildung) 	/* novtnoabi and novtabi becomes novt, colluni and collfh becomes college */
label define bildung 1 "novt" 2 "vtnoabi" 3 "vtabi" 4 "college"

tab bildung, gen(edu)
	rename edu1 novt
	rename edu2 vtnoabi
	rename edu3 vtabi
	rename edu4 college

* (potential) experience
gen exper = .
replace exper = age - (6 + 9) 		if bild == 1
replace exper = age - (6 + 9  + 3) 	if bild == 2
replace exper = age - (6 + 13) 		if bild == 3
replace exper = age - (6 + 13 + 3) 	if bild == 4
replace exper = age - (6 + 13 + 5) 	if bild == 5
replace exper = age - (6 + 13 + 5) 	if bild == 6
replace exper = 0 if exper < 0
gen exper2 = exper ^ 2
gen exper3 = exper ^ 3

* skill- and gender-specific ageprofiles
foreach var of varlist female novt vtnoabi vtabi college {
	qui gen experx`var'  = exper * `var'
	qui gen exper2x`var' = exper2 * `var'
	qui gen exper3x`var' = exper3 * `var'
	}

* Blue/white collar
recode stib (1=0)(2 3=1)(4 7=2), gen(workertype)
label define workertype 0 "unskilled worker" 1 "skilled blue collar" 2 "white collar"
label value workertype workertype

qui tab workertype, gen(wt)
	rename wt1 unskill
	rename wt2 bcollar
	rename wt3 wcollar

* Staatsangehoerigkeit
recode nation_gr (10=0)(11/99=1), gen(foreigner)

* w73_1 industry dummies
qui tab w73_1, gen(ind)
	rename ind1 landwi
	rename ind2 verarb
	rename ind3 bauwi
	rename ind4 handel
	rename ind5 verkuit
	rename ind6 kredit
	rename ind7 dieleist

***	Hier weitere Ausschluesse auf Personenebene ***
keep if age >=20 & age <=60
drop if tentgelt <20 & ("$location" == "iab")

egen miss = rowmiss(ibild)
drop if miss != 0 					/* drops < 3 percent, almost entirely due to ibild */
egen miss2 = rowmiss(female foreigner age beruf tentgelt exper workertype)
drop if miss2 != 0
drop miss miss2 stib gebjahr nation_gr tage_bet tage_erw well*

* period-occupation fixed effects
gen period = .
replace period = 1 if jahr == 1993 | jahr == 1994 | jahr == 1995 | jahr == 1996
replace period = 2 if jahr == 1997 | jahr == 1998 | jahr == 1999 | jahr == 2000
replace period = 3 if jahr == 2001 | jahr == 2002 | jahr == 2003 | jahr == 2004
replace period = 4 if jahr == 2005 | jahr == 2006 | jahr == 2007 | jahr == 2008

gen one = "1" 	/* just to be on the safe side, not having numericals start with a 0 */
format one %1s
gen per_str = string(period)
format per_str %1s
gen occ_str = string(beruf)
format occ_str %3s
egen str per_occ = concat(one per_str occ_str)
destring per_occ, gen(per_occ_fe)
drop one occ_str per_str per_occ

* period-education controls
qui tab period, gen(per)

foreach var of varlist novt vtnoabi vtabi college {
	forvalues num = 1/4	{
		qui gen `var'xper`num' = `var' * per`num'
		}
	}
drop per1 per2 per3 per4

order estid jahr persnr
sort estid jahr persnr
compress
drop if jahr>2008
save "$data/linked_1993-2008.dta", replace

***********
*** END ***
***********
log close













